Read In Data

# TRADE DATA SET UP ----------------------------------------------------------------------
# Run once the first time you open it, and never again unless you delete the files

# trade flow data url
URL <- "http://www.cepii.fr/DATA_DOWNLOAD/baci/trade_flows/BACI_HS17_V202001.zip"

# create a file on the user's desktop 
download.file(url = URL, destfile = "~/Desktop/trade_flows.zip")
# DATA READ IN ---------------------------------------------------------------------------

# unzip the file and save the .csv to the user's desktop
tradeflows <- unzip("~/Desktop/trade_flows.zip", exdir = "~/Desktop")

# read in trade flow data 
trade_flow <- read_csv(tradeflows)
## Parsed with column specification:
## cols(
##   t = col_double(),
##   i = col_double(),
##   j = col_double(),
##   k = col_character(),
##   v = col_double(),
##   q = col_double()
## )
# read in country code data 
country_codes <- read.csv("../data sets/country_codes.csv")

# read in product code data 
product_codes <- read.csv("../data sets/product_codes.csv")

Preliminary Wrangling

# DATA WRANGLING -------------------------------------------------------------------------

# remove extraneous variables 
countries <- country_codes %>%
  select(country_code, country_name_full) 

# recode levels to match with world map later (not entirely complete, add more if noticed)
countries$country_name_full <- recode(factor(countries$country_name_full), 
                                      "USA, Puerto Rico and US Virgin Islands" = "USA", 
                                      "Plurinational State of Bolivia" = "Bolivia", 
                                      "Russian Federation" = "Russia", 
                                      "France, Monaco" = "France",
                                      "Southern African Customs Union" = "South Africa", 
                                      "Viet Nam" = "Vietnam", 
                                      "United Republic of Tanzania" = "Tanzania", 
                                      "United Kingdom" = "UK", 
                                      "Norway, Svalbard and Jan Mayen" = "Norway")


# master data set with exports and imports (might be useful for networks)
trade_2018 <- trade_flow %>%
         # make variable names descriptive
  rename(year = t, product = k, exporter_code = i, importer_code = j, value = v, 
         quantity = q) %>%
  # year is always 2018 - unnecessary 
  select(-year) %>%
  # add exporter country names 
  left_join(countries, by = c("exporter_code" = "country_code")) %>%
  rename(exporter_name = country_name_full) %>%
  # add importer country names 
  left_join(countries, by = c("importer_code" = "country_code")) %>%
  rename(importer_name = country_name_full)

# separate data set for exports 
trade_exports <- trade_2018 %>%
  select(c(product, value, quantity, exporter_code, exporter_name))

# separate data set for imports 
trade_imports <- trade_2018 %>%
  select(c(product, value, quantity, importer_code, importer_name))

# proportion data sets 
whole_value <- sum(trade_2018$value)

export_prop <- trade_exports %>%
  group_by(exporter_name) %>%
  summarize(export_pct = 100*(sum(value)/whole_value))
## `summarise()` ungrouping output (override with `.groups` argument)
import_prop <- trade_imports %>%
  group_by(importer_name) %>%
  summarize(import_pct = 100*(sum(value)/whole_value))
## `summarise()` ungrouping output (override with `.groups` argument)

Interactivity With Leaflet

# MAP SETUP ------------------------------------------------------------------------------

world <- map("world", fill = TRUE, plot = FALSE)

world$names <- str_replace(world$names, pattern = ":.*$", replacement = "")

# EXPORT MAP -----------------------------------------------------------------------------

export_pct_named <- export_prop$export_pct
names(export_pct_named) <- export_prop$exporter_name

world$export_pct <- export_pct_named[world$names]

mypalette <- colorNumeric(palette = "viridis", domain = world$export_pct, 
                           na.color = "transparent")

leaflet(world) %>%
  addTiles() %>%
  setView(lat=10, lng=0 , zoom=2) %>%
  addPolygons(color = "white", fillColor = ~mypalette(export_pct), weight = 1) %>%
  addLegend(pal = mypalette, values = ~export_pct, 
            title = "Export Percent", position = "bottomleft")
# IMPORT MAP -----------------------------------------------------------------------------

import_pct_named <- import_prop$import_pct
names(import_pct_named) <- import_prop$importer_name

world$import_pct <- import_pct_named[world$names]

mypalette <- colorNumeric(palette = "viridis", domain = world$import_pct, 
                           na.color = "transparent")

leaflet(world) %>%
  addTiles() %>%
  setView(lat=10, lng=0 , zoom=2) %>%
  addPolygons(color = "white", fillColor = ~mypalette(import_pct), weight = 1) %>%
  addLegend(pal = mypalette, values = ~import_pct, 
            title = "Import Percent", position = "bottomleft")

Unsupervised learning

# Combine export + import data for clustering
trade_combine <- export_prop %>%
  inner_join(import_prop, by = c("exporter_name" = "importer_name")) %>%
  rename(country = exporter_name)

# Testing optimal number of clusters
fig <- matrix(NA, nrow=10, ncol=2)
set.seed(75)
for (i in 1:10){
  fig[i,1] <- i
  fig[i,2] <- kmeans(trade_combine[,2:3]
                    , centers=i
                    , nstart=20)$tot.withinss
}

ggplot(data = as.data.frame(fig), aes(x = V1, y = V2)) +
  geom_point() + 
  geom_line() +
  scale_x_continuous(breaks=c(1:10)) +
  labs(x = "K", y = expression("Total W"[k]))

ggplot(data = as.data.frame(fig[2:10,]), aes(x = V1, y = V2)) +
  geom_point() + 
  geom_line() +
  scale_x_continuous(breaks=c(1:10)) +
  labs(x = "K", y = expression("Total W"[k]))

# Seems like 3 clusters are best
# Use k-means clustering to identify 3 clusters based on contribution in terms of export and import to world trade
set.seed(100)
vars <- c("export_pct" , "import_pct")
km_trade <- kmeans(trade_combine[,vars], centers=3, nstart=20)

trade_combine_clust3 <- trade_combine %>%
  mutate(clust3 = as.character(km_trade$cluster))

# visualize the cluster assignments and centroids
ggplot(data = trade_combine_clust3, aes(x = import_pct, y = export_pct)) + 
  geom_point(aes(color = clust3)) +
  coord_fixed() +
  geom_point(data = as.data.frame(km_trade$centers)
             , aes(x = import_pct, y = export_pct)
             , pch = "X"
             , size = 4) +
  labs(x = "Import percentage of world import"
       , y = "Export percentage of world export" 
       , color = "Cluster Assignment")

Network

# Getting trade between countries 
trade_network <- trade_2018 %>%
  select(value, exporter_name, importer_name) %>%
  group_by(exporter_name, importer_name) %>%
  summarize(total_trade = sum(value)) 
## `summarise()` regrouping output by 'exporter_name' (override with `.groups` argument)
# Create network graph (currently too crowded)
trade_network_data <- graph_from_data_frame(trade_network
                                   , directed = TRUE)
summary(trade_network_data)
## IGRAPH 7bb9475 DN-- 221 23805 -- 
## + attr: name (v/c), total_trade (e/n)
trade_network_graph <- ggnetwork(trade_network_data)

ggplot(data = trade_network_graph
       , aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(arrow=arrow(type="closed", length=unit(6,"pt"))
            , color = "lightgray") +
  geom_nodes() +
  geom_nodelabel(aes(label = name)) +
  theme_blank()

# Matching with clusters for better information
trade_network_clust <- trade_network %>%
  left_join(trade_combine_clust3, by = c("exporter_name" = "country")) %>%
  rename(clust_ex = clust3) %>%
  select(exporter_name, importer_name, total_trade, clust_ex) %>%
  left_join(trade_combine_clust3, by = c("importer_name" = "country")) %>%
  rename(clust_im = clust3) %>%
  select(exporter_name, importer_name, total_trade, clust_ex, clust_im) 

# See network within cluster 2
clust_2_network <- trade_network_clust %>%
  filter(clust_ex == 2) %>%
  filter(clust_im == 2)


clust_2_network_data <- graph_from_data_frame(clust_2_network
                                   , directed = TRUE)

clust_2_network_graph <- ggnetwork(clust_2_network_data)

ggplot(data = clust_2_network_graph
       , aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(aes(color = total_trade), arrow = arrow(type = "closed", length = unit(6,"pt")), 
             curvature = 0.1, size = 0.8)  +
  geom_nodes() +
  geom_nodelabel(aes(label = name)) +
  theme_blank() +
  labs(title = "Network for countries in cluster 2", 
       color = "2017 trade volume") +
  scale_color_continuous(type = "viridis")

# See network within cluster 3 (not super useful bc there are too many countries within this)
clust_3_network <- trade_network_clust %>%
  filter(clust_ex == 3) %>%
  filter(clust_im == 3)


clust_3_network_data <- graph_from_data_frame(clust_3_network
                                   , directed = TRUE)

clust_3_network_graph <- ggnetwork(clust_3_network_data)

ggplot(data = clust_3_network_graph
       , aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(aes(color = total_trade), arrow = arrow(type = "closed", length = unit(6,"pt")), 
             curvature = 0.1, size = 0.8)  +
  geom_nodes() +
  geom_nodelabel(aes(label = name)) +
  theme_blank() +
  labs(title = "Network for countries in cluster 3", 
       color = "2017 trade volume") +
  scale_color_continuous(type = "viridis")

## Trying Out Some Network Graphs

# function for an interactive network 
# thickness corresponds to the volume of trade
# you can pull the nodes towards the edge to better see the graph
# click on a node to highlight its exports
make_network <- function(network) {
  
  c.factor <- function(..., recursive=TRUE) unlist(list(...), recursive=recursive)
  
  name_vec <- name_vec <- c.factor(unique(c.factor(unique(network$exporter_name), 
                     unique(network$importer_name))))
  
  from <- network$exporter_name
  to <- network$importer_name
  trade <- network$total_trade
  
  
  edges <- data.frame(from = from, to = to, value = trade)
  nodes <- data.frame(id = name_vec, label = name_vec, group = name_vec)

  visNetwork(nodes, edges, height = "1000px", width = "100%")  %>% 
    visNodes(shape = "dot") %>%  
    visEdges(arrows = "to", arrowStrikethrough = F, smooth = T, physics = F) %>%
    visOptions(highlightNearest = list(enabled = T, degree = 0)) %>%
  visLayout(randomSeed = 17)

}

make_network(clust_2_network) 
make_network(clust_3_network)
# more generic graphs but still interactive
visIgraph(clust_2_network_data)
visIgraph(clust_3_network_data)

Rough Graphs

# read in world map data from the maps package 
world_map <- map_data(map = "world", region = ".")

#colored graph for exported percentages --------------------------------------------------

# get the data ready 
export_prop %>%
  rename(region = exporter_name) %>%
  inner_join(world_map, by = "region") %>%
# create the plot  
ggplot(aes(x = long, y = lat, group = group, fill = export_pct)) +
  geom_polygon(color = "black", alpha = 0.8) +
  theme_void() +
  coord_fixed(ratio = 1.3) +
  labs(title = "Each country's contribution to total worldwide export in percentages", 
       fill = "Export Percentage: ", 
       caption = "*Countries in white have no data*") +
  theme(legend.position = "bottom") + 
  scale_fill_viridis(option = "viridis", direction = -1) 

# colored graph for imported percentages -------------------------------------------------

# get the data ready 
import_data <- import_prop %>%
  rename(region = importer_name) %>%
  inner_join(world_map, import_prop, by = "region")
# create the plot
ggplot(import_data, aes(x = long, y = lat, group = group, fill = import_pct)) +
  geom_polygon(color = "black", alpha = 0.8) +
  theme_void() +
  coord_fixed(ratio = 1.3) +
  labs(title = "Each country's contribution to total worldwide import in percentages", 
       fill = "Import Percentage: ", 
       caption = "*Countries in white have no data*") +
  theme(legend.position="bottom") + 
  scale_fill_viridis(option = "plasma", direction = -1) 

# stuff to work on -----------------------------------------------------------------------

#try for arrows, but work on this weekend
#tradedata2018_both_direct <- graph_from_edgelist(as.matrix(tradedata2018_both[,3:4]), 
# directed = TRUE)
#
#ggplot(data = ggnetwork(tradedata2018_both_direct), aes(x = x, y = y, xend = xend, 
#yend = yend)) + 
#geom_edges(arrow=arrow(type="closed", #length=unit(6,"pt"))
#            , color = "lightgray") + geom_nodes() + geom_nodelabel(aes(label = name)) + 
#theme_blank()